# -*- coding: utf-8 -*-

import re
import requests
import datetime
from bs4 import BeautifulSoup
import pandas as pd
from fake_useragent import UserAgent

Domain_Name = 'https:'
headers = {
    'User-Agent': UserAgent().random,
    'Referer': "https://bbs.hupu.com/all-gambia"
}

url = 'https://bbs.hupu.com/all-gambia'

r = requests.get(url, headers=headers)

r.encoding = r.apparent_encoding
soup = BeautifulSoup(r.text,"html.parser")
print('----------------分割线----------------')
#爬模块
bbsHotPit = soup.find('div', class_='bbsHotPit')
bbs_hot = bbsHotPit.find_all('div',class_='list')
#print(bbs_hot)
#存储
dts = []
#在每个板块下面提取出标题等
for module in bbs_hot:
    module_topic = module.find('span', class_='weight').get_text()
    print(module_topic)
    module_question = module.find_all('span', class_='textSpan')
    #print(module_question)
    for i, each_module_question in enumerate(module_question):
        each_q = each_module_question.find('a').get('title')
        each_url = each_module_question.find('a').get('href')
        #存储
        lst = []
        lst.append(each_q)
        lst.append(str(" 网址：https://bbs.hupu.com/"+ each_url))
        dts.append(lst)
        print(each_q + " 网址：https://bbs.hupu.com/" +each_url)

df = pd.DataFrame(dts, columns=['问题名称', '链接'])
df.to_excel('./HUPU热搜榜' + str(datetime.datetime.now().strftime('%Y%m%d')) + '.xlsx',
            encoding='gbk')  # 写入excel中
print('爬取完成')

# for i in range(len(content)):
#     lst = []
#     lst.append(content[i])
#     lst.append(hot[i])
#     lst.append(str(url[i]).replace('u002F', ''))
#     dts.append(lst)
# df = pd.DataFrame(dts, columns=['问题名称', '问题热度', '链接'])
# df.to_excel('./zhihu热搜榜' + str(datetime.datetime.now().strftime('%Y%m%d')) + '.xlsx',
#             encoding='gbk')  # 写入excel中
# print('爬取完成')

